In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
In [3]:
sns.set(style="whitegrid")
In [4]:
df = sns.load_dataset("iris")
df.head()
Out[4]:
| sepal_length | sepal_width | petal_length | petal_width | species | |
|---|---|---|---|---|---|
| 0 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
| 1 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
| 2 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
| 3 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
| 4 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
In [5]:
df.describe()
Out[5]:
| sepal_length | sepal_width | petal_length | petal_width | |
|---|---|---|---|---|
| count | 150.000000 | 150.000000 | 150.000000 | 150.000000 |
| mean | 5.843333 | 3.057333 | 3.758000 | 1.199333 |
| std | 0.828066 | 0.435866 | 1.765298 | 0.762238 |
| min | 4.300000 | 2.000000 | 1.000000 | 0.100000 |
| 25% | 5.100000 | 2.800000 | 1.600000 | 0.300000 |
| 50% | 5.800000 | 3.000000 | 4.350000 | 1.300000 |
| 75% | 6.400000 | 3.300000 | 5.100000 | 1.800000 |
| max | 7.900000 | 4.400000 | 6.900000 | 2.500000 |
In [6]:
np_array = df.select_dtypes(include=[float]).values
np_array[:5]
Out[6]:
array([[5.1, 3.5, 1.4, 0.2],
[4.9, 3. , 1.4, 0.2],
[4.7, 3.2, 1.3, 0.2],
[4.6, 3.1, 1.5, 0.2],
[5. , 3.6, 1.4, 0.2]])
In [7]:
means = np.mean(np_array, axis=0)
stds = np.std(np_array, axis=0)
means, stds
Out[7]:
(array([5.84333333, 3.05733333, 3.758 , 1.19933333]), array([0.82530129, 0.43441097, 1.75940407, 0.75969263]))
In [8]:
plt.hist(df["sepal_length"])
plt.title("Distribution des longueurs de sépale")
plt.xlabel("Longueur")
plt.ylabel("Fréquence")
plt.show()
In [9]:
plt.plot(df["sepal_length"])
plt.title("Longueur de sépale")
plt.xlabel("Index")
plt.ylabel("Longueur")
plt.show()
In [10]:
sns.pairplot(df, hue="species")
plt.show()
In [11]:
plt.figure(figsize=(8,6))
sns.heatmap(df.corr(), annot=True, cmap="coolwarm")
plt.title("Correlation Heatmap")
plt.show()
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[11], line 2 1 plt.figure(figsize=(8,6)) ----> 2 sns.heatmap(df.corr(), annot=True, cmap="coolwarm") 3 plt.title("Correlation Heatmap") 4 plt.show() File ~\anaconda3\Lib\site-packages\pandas\core\frame.py:11049, in DataFrame.corr(self, method, min_periods, numeric_only) 11047 cols = data.columns 11048 idx = cols.copy() > 11049 mat = data.to_numpy(dtype=float, na_value=np.nan, copy=False) 11051 if method == "pearson": 11052 correl = libalgos.nancorr(mat, minp=min_periods) File ~\anaconda3\Lib\site-packages\pandas\core\frame.py:1993, in DataFrame.to_numpy(self, dtype, copy, na_value) 1991 if dtype is not None: 1992 dtype = np.dtype(dtype) -> 1993 result = self._mgr.as_array(dtype=dtype, copy=copy, na_value=na_value) 1994 if result.dtype is not dtype: 1995 result = np.asarray(result, dtype=dtype) File ~\anaconda3\Lib\site-packages\pandas\core\internals\managers.py:1694, in BlockManager.as_array(self, dtype, copy, na_value) 1692 arr.flags.writeable = False 1693 else: -> 1694 arr = self._interleave(dtype=dtype, na_value=na_value) 1695 # The underlying data was copied within _interleave, so no need 1696 # to further copy if copy=True or setting na_value 1698 if na_value is lib.no_default: File ~\anaconda3\Lib\site-packages\pandas\core\internals\managers.py:1753, in BlockManager._interleave(self, dtype, na_value) 1751 else: 1752 arr = blk.get_values(dtype) -> 1753 result[rl.indexer] = arr 1754 itemmask[rl.indexer] = 1 1756 if not itemmask.all(): ValueError: could not convert string to float: 'setosa'
<Figure size 800x600 with 0 Axes>
In [12]:
fig = px.scatter(
df, x="sepal_length", y="sepal_width",
color="species", title="Plotly Scatter – Iris"
)
fig.show()
In [13]:
fig = px.scatter_3d(
df,
x="sepal_length",
y="sepal_width",
z="petal_length",
color="species",
title="Iris – 3D Plot"
)
fig.show()
In [14]:
# Charger un dataset Kaggle ou un CSV local
titanic = sns.load_dataset("titanic")
titanic.head()
Out[14]:
| survived | pclass | sex | age | sibsp | parch | fare | embarked | class | who | adult_male | deck | embark_town | alive | alone | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | S | Third | man | True | NaN | Southampton | no | False |
| 1 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C | First | woman | False | C | Cherbourg | yes | False |
| 2 | 1 | 3 | female | 26.0 | 0 | 0 | 7.9250 | S | Third | woman | False | NaN | Southampton | yes | True |
| 3 | 1 | 1 | female | 35.0 | 1 | 0 | 53.1000 | S | First | woman | False | C | Southampton | yes | False |
| 4 | 0 | 3 | male | 35.0 | 0 | 0 | 8.0500 | S | Third | man | True | NaN | Southampton | no | True |
In [15]:
titanic = titanic.dropna(subset=["age"])
In [16]:
sns.barplot(data=titanic, x="class", y="age", hue="sex")
plt.title("Âge moyen par classe – Titanic")
plt.show()
In [17]:
plt.boxplot(titanic["age"])
plt.title("Distribution âges – Titanic")
plt.show()
In [18]:
fig = px.sunburst(
titanic,
path=["class", "sex", "survived"],
values="age",
title="Sunburst – Titanic"
)
fig.show()
C:\Users\mee\anaconda3\Lib\site-packages\plotly\express\_core.py:1727: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning. C:\Users\mee\anaconda3\Lib\site-packages\plotly\express\_core.py:1727: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning. C:\Users\mee\anaconda3\Lib\site-packages\plotly\express\_core.py:1727: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
In [ ]: